df <- read_csv("./data/combinedstatshot.csv")
## Rows: 165 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): player, dunk_tot, dunk_pct, rim_tot, rim_pct, rim_asted, other2pt_...
## dbl (24): games, games_started, mp_per_g, fg_per_g, fga_per_g, fg_pct, fg2_p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- df |> select(!c("games_started", "pf_per_g"))
colnames(df)
##  [1] "player"         "dunk_tot"       "dunk_pct"       "rim_tot"       
##  [5] "rim_pct"        "rim_asted"      "other2pt_tot"   "other2pt_pct"  
##  [9] "other2pt_asted" "3pt_tot"        "3pt_pct"        "3pt_asted"     
## [13] "games"          "mp_per_g"       "fg_per_g"       "fga_per_g"     
## [17] "fg_pct"         "fg2_per_g"      "fg2a_per_g"     "fg2_pct"       
## [21] "fg3_per_g"      "fg3a_per_g"     "fg3_pct"        "ft_per_g"      
## [25] "fta_per_g"      "ft_pct"         "orb_per_g"      "drb_per_g"     
## [29] "trb_per_g"      "ast_per_g"      "stl_per_g"      "blk_per_g"     
## [33] "tov_per_g"      "pts_per_g"

Plot 2pt attempts to 3pt attempts

From success script

path <- "~/BruinSports/data/draftdata.csv"
df_career_stats <- read_csv(path)
## Rows: 960 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): team_id, player, college_name, skip
## dbl (19): pick_overall, seasons, g, mp, pts, trb, ast, fg_pct, fg3_pct, ft_p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
drop_cols <- c('team_id', 'skip', 'mp', 'pts', 'trb', 'ast')

df_career_stats <- df_career_stats |> select(!drop_cols)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(drop_cols)
## 
##   # Now:
##   data %>% select(all_of(drop_cols))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
df_lot_picks <- df_career_stats |> filter(pick_overall < 15)

colnames(df_career_stats)
##  [1] "pick_overall" "player"       "college_name" "seasons"      "g"           
##  [6] "fg_pct"       "fg3_pct"      "ft_pct"       "mp_per_g"     "pts_per_g"   
## [11] "trb_per_g"    "ast_per_g"    "ws"           "ws_per_48"    "bpm"         
## [16] "vorp"         "year"
# plotting the distribution of points + rebounds + asts (PRA) per game
df_lot_picks <- df_lot_picks |> mutate(
  pra_per_g = pts_per_g + trb_per_g + ast_per_g,
  pick_overall = factor(pick_overall)
)

# Group by draft position, and getting the mean
draft_means <- df_lot_picks |> group_by(pick_overall) |>
  summarize(avg_mpg = mean(mp_per_g),
            avg_ppg = mean(pts_per_g),
            avg_trbpg = mean(trb_per_g),
            avg_apg = mean(ast_per_g),
            avg_prapg = mean(pra_per_g))

print(draft_means, n = 14)
## # A tibble: 14 × 6
##    pick_overall avg_mpg avg_ppg avg_trbpg avg_apg avg_prapg
##    <fct>          <dbl>   <dbl>     <dbl>   <dbl>     <dbl>
##  1 1               31.2   18.8       6.52    4.32      29.7
##  2 2               26.9   13.7       4.96    2.88      21.6
##  3 3               30.0   17.6       6.3     3.52      27.4
##  4 4               27.5   12.8       5.39    2.48      20.7
##  5 5               26.0   12.7       4.92    3.55      21.2
##  6 6               23.2   10.2       4.35    2.26      16.8
##  7 7               27.6   13.4       4.88    2.77      21.0
##  8 8               21.1    8.82      3.37    1.77      14.0
##  9 9               24.5   10.9       4.55    2.52      18.0
## 10 10              23.5   10.4       3.61    2.31      16.3
## 11 11              21.1   10.2       3.69    2.16      16.0
## 12 12              24.6   10.6       4.47    2.23      17.3
## 13 13              22.6   10.9       3.99    2.23      17.1
## 14 14              20.3    8.78      3.75    1.3       13.8
df_lot_picks |> ggplot(aes(x = pick_overall, y = pra_per_g)) +
  geom_boxplot() + 
  labs(x = "Draft Pick", y = "Points-Rebounds-Assists Per Game")

Plotting the players in the top 25% in PRA per game

df2 <- left_join(df, df_lot_picks, by = c("player"), suffix = c("_college", "_nba"))
df2 <- df2 |> 
  separate_wider_delim(dunk_tot, delim = "-", names = c("dunk_made", "dunk_attempts"))

df2 <- df2 |>
  separate_wider_delim(rim_tot, delim = "-", names = c("rim_made", "rim_attempts"))

df2 <- df2 |>
  separate_wider_delim(other2pt_tot, delim = "-", names = c("other2pt_made", "other2pt_attempts"))

df2 <-df2 |>
  mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, other2pt_attempts), as.numeric))

defining bust metric

df2 <- df2 |> mutate(
  vorp_per_g = vorp / g
)

df_top_players <- df2 |> 
  group_by(pick_overall) |>
  summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.7)))

df_bottom_players <- df2 |> 
  group_by(pick_overall) |>
  summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.3)))

# metric favors big men
# make the rebound percentile higher

is_not_bust <- function(pick_number, df_top_players = df_top_players, df2 = df2) {
  ppg <- df_top_players |> pull(pts_per_g_nba)
  apg <- df_top_players |> pull(ast_per_g_nba)
  prapg <- df_top_players |> pull(pra_per_g)
  
  df_top_rb <- df2 |> 
    group_by(pick_overall) |>
    summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.8))
  
  rpg <- df_top_rb |> pull(trb_per_g)
  
  df_top_vorp <- df2 |> 
    group_by(pick_overall) |>
    summarize(vorp_per_g = median(vorp_per_g))
  
  vorppg <- df_top_vorp |> pull(vorp_per_g)
  
  df <- df2 |> filter(pick_overall == pick_number) |>
  filter(pra_per_g >= prapg[[pick_number]] | ast_per_g_nba >= apg[[pick_number]] | trb_per_g_nba >= rpg[[pick_number]] | pts_per_g_nba >= ppg[[pick_number]]) |>
    filter(vorp_per_g >= vorppg[[pick_number]]) 
  
  # must also have played at least most of their career in the nba
  df <- df |> filter(seasons >= 4 / 5 * (2023-year))
  
  return(df)
}

is_bust <- function(pick_number, df_bottom_players = df_bottom_players, df2 = df2) {
  ppg <- df_bottom_players |> pull(pts_per_g_nba)
  
  apg <- df_bottom_players |> pull(ast_per_g_nba)
  prapg <- df_bottom_players |> pull(pra_per_g)
  
  df_bottom_rb <- df2 |> 
    group_by(pick_overall) |>
    summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.4))
  
  rpg <- df_bottom_rb |> pull(trb_per_g)
  
  df_top_vorp <- df2 |> 
    group_by(pick_overall) |>
    summarize(vorp_per_g = median(vorp_per_g))
  
  vorppg <- df_top_vorp |> pull(vorp_per_g)
  
  # playing less than half the seasons since drafted makes you a bust
  df <- df2 |> filter(pick_overall == pick_number) |>
  filter((pra_per_g < prapg[[pick_number]] & ast_per_g_nba < apg[[pick_number]] & trb_per_g_nba < rpg[[pick_number]] & pts_per_g_nba < ppg[[pick_number]] & vorp_per_g < vorppg[[pick_number]]) | seasons < 1 / 2 * (2023-year))
  
  
  return(df)
}
# first overall picks (not bust)
df_pick_1 <- is_not_bust(1, df_top_players, df2)

# first overall picks (bust)
df_pick_1_bust <- is_bust(1, df_bottom_players, df2)

df_pick_1
## # A tibble: 7 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 John Wall              33            36 91.7%         116          182 63.7%  
## 2 Kyrie Irving            0             0 0%             26           39 66.7%  
## 3 Anthony Davis          96            98 98.0%         152          174 87.4%  
## 4 Karl-Anthony T…        22            24 91.7%          87          121 71.9%  
## 5 Ben Simmons            56            61 91.8%         159          220 72.3%  
## 6 Zion Williamson        72            79 91.1%         247          313 78.9%  
## 7 Anthony Edwards        27            27 100.0%         89          129 69.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_1_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Anthony Bennett        53            58 91.4%         100          140 71.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# second overall
df_pick_2 <- is_not_bust(2, df_top_players, df2)
df_pick_2_bust <- is_bust(2, df_bottom_players, df2)

df_pick_2
## # A tibble: 5 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 D'Angelo Russe…         4             4 100.0%         70          110 63.6%  
## 2 Brandon Ingram         17            17 100.0%         69          117 59.0%  
## 3 Lonzo Ball             37            40 92.5%          94          120 78.3%  
## 4 Ja Morant              28            31 90.3%         160          264 60.6%  
## 5 Chet Holmgren          57            57 100.0%        105          125 84.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_2_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Derrick Willia…        56            60 93.3%         135          188 71.8%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# third overall
df_pick_3 <- is_not_bust(3, df_top_players, df2)
df_pick_3_bust <- is_bust(3, df_bottom_players, df2)

df_pick_3
## # A tibble: 4 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Bradley Beal        18            20 90.0%          89          137 65.0%  
## 2 Joel Embiid         30            30 100.0%         80           99 80.8%  
## 3 Jayson Tatum        18            21 85.7%          79          126 62.7%  
## 4 Evan Mobley         63            66 95.5%         113          144 78.5%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_3_bust
## # A tibble: 1 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Jahlil Okafor        64            67 95.5%         213          270 78.9%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourth overall
df_pick_4 <- is_not_bust(4, df_top_players, df2)
df_pick_4_bust <- is_bust(4, df_bottom_players, df2)

df_pick_4
## # A tibble: 4 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Aaron Gordon           54            56 96.4%         137          198 69.2%  
## 2 Jaren Jackson …        31            31 100.0%         61           93 65.6%  
## 3 Scottie Barnes         19            21 90.5%          61           89 68.5%  
## 4 Keegan Murray          63            67 94.0%         196          277 70.8%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_4_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## #   dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## #   rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## #   3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## #   fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# fifth overall
df_pick_5 <- is_not_bust(5, df_top_players, df2)
df_pick_5_bust <- is_bust(5, df_bottom_players, df2)

df_pick_5
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 DeMarcus Cousi…        53            57 93.0%         144          189 76.2%  
## 2 De'Aaron Fox           20            21 95.2%         131          203 64.5%  
## 3 Trae Young              0             0 0%            105          201 52.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_5_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Thomas Robinson        70            83 84.3%         169          262 64.5%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# sixth overall
df_pick_6 <- is_not_bust(6, df_top_players, df2)
df_pick_6_bust <- is_bust(6, df_bottom_players, df2)

df_pick_6
## # A tibble: 5 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Damian Lillard        13            17 76.5%          98          169 58.0%  
## 2 Nerlens Noel          48            50 96.0%          76           99 76.8%  
## 3 Marcus Smart          16            18 88.9%          78          110 70.9%  
## 4 Buddy Hield           18            22 81.8%         119          178 66.9%  
## 5 Onyeka Okongwu        58            61 95.1%         135          186 72.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_6_bust
## # A tibble: 1 × 55
##   player    dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>         <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ekpe Udoh        30            32 93.8%          78          109 71.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# seventh overall
df_pick_7 <- is_not_bust(7, df_top_players, df2)
df_pick_7_bust <- is_bust(7, df_bottom_players, df2)

df_pick_7
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Julius Randle          37            40 92.5%         132          197 67.0%  
## 2 Jamal Murray           18            19 94.7%          77          111 69.4%  
## 3 Lauri Markkanen        20            24 83.3%          65          100 65.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_7_bust
## # A tibble: 1 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ben McLemore        44            45 97.8%          90          126 71.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eighth overall
df_pick_8 <- is_not_bust(8, df_top_players, df2)
df_pick_8_bust <- is_bust(8, df_bottom_players, df2)

df_pick_8
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Al-Farouq Aminu        46            48 95.8%         112          173 64.7%  
## 2 Kentavious Cal…        15            16 93.8%          63           94 67.0%  
## 3 Franz Wagner           11            11 100.0%         63           93 67.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_8_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## #   dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## #   rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## #   3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## #   fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# ninth overall
df_pick_9 <- is_not_bust(9, df_top_players, df2)
df_pick_9_bust <- is_bust(9, df_bottom_players, df2)

df_pick_9
## # A tibble: 5 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Gordon Hayward        19            20 95.0%          89          128 69.5%  
## 2 Kemba Walker           3             3 100.0%        115          196 58.7%  
## 3 Andre Drummond        80            89 89.9%         130          185 70.3%  
## 4 Trey Burke             9             9 100.0%         67          105 63.8%  
## 5 Jakob Poeltl          32            34 94.1%         199          284 70.1%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_9_bust
## # A tibble: 1 × 55
##   player     dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>          <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Kevin Knox        18            20 90.0%          65           99 65.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# tenth overall
df_pick_10 <- is_not_bust(10, df_top_players, df2)
df_pick_10_bust <- is_bust(10, df_bottom_players, df2)

df_pick_10
## # A tibble: 5 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Paul George          18            22 81.8%          70          106 66.0%  
## 2 CJ McCollum           3             3 100.0%         34           63 54.0%  
## 3 Elfrid Payton        21            24 87.5%         169          247 68.4%  
## 4 Mikal Bridges        35            42 83.3%         109          161 67.7%  
## 5 Jalen Smith          49            52 94.2%         114          158 72.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_10_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ziaire Williams        10            11 90.9%          26           49 53.1%  
## 2 Johnny Davis           16            19 84.2%          89          143 62.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eleventh overall
df_pick_11 <- is_not_bust(11, df_top_players, df2)
df_pick_11_bust <- is_bust(11, df_bottom_players, df2)

df_pick_11
## # A tibble: 4 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Klay Thompson           8             8 100.0%         66          110 60.0%  
## 2 Myles Turner           11            13 84.6%          40           54 74.1%  
## 3 Domantas Sabon…        22            24 91.7%         157          214 73.4%  
## 4 Shai Gilgeous-…        11            11 100.0%        108          182 59.3%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_11_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 James Bouknight        12            12 100.0%         52           79 65.8%  
## 2 Jett Howard             6             6 100.0%         29           47 61.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# twelveth overall
df_pick_12 <- is_not_bust(12, df_top_players, df2)
df_pick_12_bust <- is_bust(12, df_bottom_players, df2)

df_pick_12
## # A tibble: 5 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Steven Adams           29            33 87.9%          85          129 65.9%  
## 2 Miles Bridges          30            35 85.7%          84          128 65.6%  
## 3 Tyrese Halibur…         7             8 87.5%          46           62 74.2%  
## 4 Jalen Williams         25            27 92.6%         124          186 66.7%  
## 5 Dereck Lively …        54            55 98.2%          74           96 77.1%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_12_bust
## # A tibble: 1 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Xavier Henry        17            17 100.0%         60           90 66.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# thirteenth overall
df_pick_13 <- is_not_bust(13, df_top_players, df2)
df_pick_13_bust <- is_bust(13, df_bottom_players, df2)

df_pick_13
## # A tibble: 7 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ed Davis               26            27 96.3%          42           50 84.0%  
## 2 Kelly Olynyk           25            28 89.3%         152          212 71.7%  
## 3 Zach LaVine            21            25 84.0%          51           90 56.7%  
## 4 Devin Booker            8             9 88.9%          42           59 71.2%  
## 5 Donovan Mitche…         9            13 69.2%          64          116 55.2%  
## 6 Tyler Herro             4             5 80.0%          56           84 66.7%  
## 7 Jalen Duren            70            76 92.1%         111          152 73.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_13_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Kendall Marsha…         0             0 0%             35           53 66.0%  
## 2 Jerome Robinson        12            13 92.3%          98          157 62.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourteenth overall
df_pick_14 <- is_not_bust(14, df_top_players, df2)
df_pick_14_bust <- is_bust(14, df_bottom_players, df2)

df_pick_14
## # A tibble: 4 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Marcus Morris        31            33 93.9%         114          147 77.6%  
## 2 T.J. Warren          37            37 100.0%        192          251 76.5%  
## 3 Cameron Payne         3             3 100.0%         53           87 60.9%  
## 4 Bam Adebayo          99           105 94.3%         138          185 74.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_14_bust
## # A tibble: 1 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Romeo Langford        10            14 71.4%          91          138 65.9%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# combining dataframes

df_good <- bind_rows(df_pick_1, df_pick_2, df_pick_3, df_pick_4, df_pick_5, df_pick_6, df_pick_7, df_pick_8, df_pick_9,
              df_pick_10, df_pick_11, df_pick_12, df_pick_13, df_pick_14)

df_busts <- bind_rows(df_pick_1_bust, df_pick_2_bust, df_pick_3_bust, df_pick_4_bust, df_pick_5_bust, df_pick_6_bust, df_pick_7_bust, df_pick_8_bust, df_pick_9_bust, df_pick_10_bust, df_pick_11_bust, df_pick_12_bust, df_pick_13_bust, df_pick_14_bust)

print(df_good, n = 20)
## # A tibble: 64 × 55
##    player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
##  1 John Wall             33            36 91.7%         116          182 63.7%  
##  2 Kyrie Irving           0             0 0%             26           39 66.7%  
##  3 Anthony Davis         96            98 98.0%         152          174 87.4%  
##  4 Karl-Anthony …        22            24 91.7%          87          121 71.9%  
##  5 Ben Simmons           56            61 91.8%         159          220 72.3%  
##  6 Zion Williams…        72            79 91.1%         247          313 78.9%  
##  7 Anthony Edwar…        27            27 100.0%         89          129 69.0%  
##  8 D'Angelo Russ…         4             4 100.0%         70          110 63.6%  
##  9 Brandon Ingram        17            17 100.0%         69          117 59.0%  
## 10 Lonzo Ball            37            40 92.5%          94          120 78.3%  
## 11 Ja Morant             28            31 90.3%         160          264 60.6%  
## 12 Chet Holmgren         57            57 100.0%        105          125 84.0%  
## 13 Bradley Beal          18            20 90.0%          89          137 65.0%  
## 14 Joel Embiid           30            30 100.0%         80           99 80.8%  
## 15 Jayson Tatum          18            21 85.7%          79          126 62.7%  
## 16 Evan Mobley           63            66 95.5%         113          144 78.5%  
## 17 Aaron Gordon          54            56 96.4%         137          198 69.2%  
## 18 Jaren Jackson…        31            31 100.0%         61           93 65.6%  
## 19 Scottie Barnes        19            21 90.5%          61           89 68.5%  
## 20 Keegan Murray         63            67 94.0%         196          277 70.8%  
## # ℹ 44 more rows
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>, …
df_good |> pull(player)
##  [1] "John Wall"                "Kyrie Irving"            
##  [3] "Anthony Davis"            "Karl-Anthony Towns"      
##  [5] "Ben Simmons"              "Zion Williamson"         
##  [7] "Anthony Edwards"          "D'Angelo Russell"        
##  [9] "Brandon Ingram"           "Lonzo Ball"              
## [11] "Ja Morant"                "Chet Holmgren"           
## [13] "Bradley Beal"             "Joel Embiid"             
## [15] "Jayson Tatum"             "Evan Mobley"             
## [17] "Aaron Gordon"             "Jaren Jackson Jr."       
## [19] "Scottie Barnes"           "Keegan Murray"           
## [21] "DeMarcus Cousins"         "De'Aaron Fox"            
## [23] "Trae Young"               "Damian Lillard"          
## [25] "Nerlens Noel"             "Marcus Smart"            
## [27] "Buddy Hield"              "Onyeka Okongwu"          
## [29] "Julius Randle"            "Jamal Murray"            
## [31] "Lauri Markkanen"          "Al-Farouq Aminu"         
## [33] "Kentavious Caldwell-Pope" "Franz Wagner"            
## [35] "Gordon Hayward"           "Kemba Walker"            
## [37] "Andre Drummond"           "Trey Burke"              
## [39] "Jakob Poeltl"             "Paul George"             
## [41] "CJ McCollum"              "Elfrid Payton"           
## [43] "Mikal Bridges"            "Jalen Smith"             
## [45] "Klay Thompson"            "Myles Turner"            
## [47] "Domantas Sabonis"         "Shai Gilgeous-Alexander" 
## [49] "Steven Adams"             "Miles Bridges"           
## [51] "Tyrese Haliburton"        "Jalen Williams"          
## [53] "Dereck Lively II"         "Ed Davis"                
## [55] "Kelly Olynyk"             "Zach LaVine"             
## [57] "Devin Booker"             "Donovan Mitchell"        
## [59] "Tyler Herro"              "Jalen Duren"             
## [61] "Marcus Morris"            "T.J. Warren"             
## [63] "Cameron Payne"            "Bam Adebayo"
df_busts
## # A tibble: 15 × 55
##    player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
##  1 Anthony Benne…        53            58 91.4%         100          140 71.4%  
##  2 Derrick Willi…        56            60 93.3%         135          188 71.8%  
##  3 Jahlil Okafor         64            67 95.5%         213          270 78.9%  
##  4 Thomas Robins…        70            83 84.3%         169          262 64.5%  
##  5 Ekpe Udoh             30            32 93.8%          78          109 71.6%  
##  6 Ben McLemore          44            45 97.8%          90          126 71.4%  
##  7 Kevin Knox            18            20 90.0%          65           99 65.7%  
##  8 Ziaire Willia…        10            11 90.9%          26           49 53.1%  
##  9 Johnny Davis          16            19 84.2%          89          143 62.2%  
## 10 James Bouknig…        12            12 100.0%         52           79 65.8%  
## 11 Jett Howard            6             6 100.0%         29           47 61.7%  
## 12 Xavier Henry          17            17 100.0%         60           90 66.7%  
## 13 Kendall Marsh…         0             0 0%             35           53 66.0%  
## 14 Jerome Robins…        12            13 92.3%          98          157 62.4%  
## 15 Romeo Langford        10            14 71.4%          91          138 65.9%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_busts |> pull(player)
##  [1] "Anthony Bennett"  "Derrick Williams" "Jahlil Okafor"    "Thomas Robinson" 
##  [5] "Ekpe Udoh"        "Ben McLemore"     "Kevin Knox"       "Ziaire Williams" 
##  [9] "Johnny Davis"     "James Bouknight"  "Jett Howard"      "Xavier Henry"    
## [13] "Kendall Marshall" "Jerome Robinson"  "Romeo Langford"
plot_good <- ggplot(df_good, aes(x = fg2_per_g, y = fg3_per_g)) +
  geom_point(color = "green", size = 4, alpha = 0.5) +
  geom_label_repel(data = df_good, aes(label = player), size = 1.5, max.overlaps = 20) +
  labs(title = "CBB Shot Selection for Good NBA Lottery Picks", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
  theme_bw()

plot_busts <- ggplot(df_busts, aes(x = fg2_per_g, y = fg3_per_g, label = player)) +
  geom_point(color = "red", size = 4, alpha = 0.5) +
  geom_label_repel(size = 3) +
  labs(title = "CBB Shot Selection for NBA Lottery Busts", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
  theme_bw()


plot_combined <- ggplot(df2, aes(x = fg2_per_g, y = fg3_per_g)) +
  geom_point(aes(color = "Average value"), size = 4, alpha = 0.2) +
  geom_point(data = df_busts, aes(color = "Bad value"), size = 4, alpha = 0.5) +
  geom_point(data = df_good, aes(color = "Good value"), size = 4, alpha = 0.5) +
  labs(title = "CBB Shot Selection for NBA Lottery Picks", 
       x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
  scale_color_manual(values = c("Bad value" = "red", "Good value" = "green", "Average value" = "grey")) +
  theme_bw()

plot_combined

plot_busts

plot_good

Principal Component Analysis of college stats, for grouping/covariance

library(corrr)
library(ggcorrplot)
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

Preparing Data

colnames(df2)
##  [1] "player"            "dunk_made"         "dunk_attempts"    
##  [4] "dunk_pct"          "rim_made"          "rim_attempts"     
##  [7] "rim_pct"           "rim_asted"         "other2pt_made"    
## [10] "other2pt_attempts" "other2pt_pct"      "other2pt_asted"   
## [13] "3pt_tot"           "3pt_pct"           "3pt_asted"        
## [16] "games"             "mp_per_g_college"  "fg_per_g"         
## [19] "fga_per_g"         "fg_pct_college"    "fg2_per_g"        
## [22] "fg2a_per_g"        "fg2_pct"           "fg3_per_g"        
## [25] "fg3a_per_g"        "fg3_pct_college"   "ft_per_g"         
## [28] "fta_per_g"         "ft_pct_college"    "orb_per_g"        
## [31] "drb_per_g"         "trb_per_g_college" "ast_per_g_college"
## [34] "stl_per_g"         "blk_per_g"         "tov_per_g"        
## [37] "pts_per_g_college" "pick_overall"      "college_name"     
## [40] "seasons"           "g"                 "fg_pct_nba"       
## [43] "fg3_pct_nba"       "ft_pct_nba"        "mp_per_g_nba"     
## [46] "pts_per_g_nba"     "trb_per_g_nba"     "ast_per_g_nba"    
## [49] "ws"                "ws_per_48"         "bpm"              
## [52] "vorp"              "year"              "pra_per_g"        
## [55] "vorp_per_g"
df3 <- column_to_rownames(df2, var = "player")

df_cbb <- df3 |> select(
  dunk_made, dunk_attempts, dunk_pct, rim_made, rim_attempts, rim_pct, rim_asted,
  other2pt_made, other2pt_attempts, other2pt_pct, other2pt_asted, fg2_pct, fg3_per_g, fg3a_per_g, 
 `3pt_asted`, games, ft_per_g, fta_per_g, ast_per_g_college, orb_per_g, drb_per_g,
  stl_per_g, blk_per_g, tov_per_g, pts_per_g_college
) |> rename(fg3_asted = `3pt_asted`) |> 
  mutate(dunk_pct = parse_number(dunk_pct) / 100, # changing to percentages
         rim_pct = parse_number(rim_pct) / 100,
         rim_asted = parse_number(rim_asted) / 100,
         other2pt_pct = parse_number(other2pt_pct) / 100,
         other2pt_asted = parse_number(other2pt_asted) / 100,
         fg3_asted = parse_number(fg3_asted) / 100,
         fg3_pct_per_g = fg3_per_g / fg3a_per_g)

df_cbb <- df_cbb |> 
  mutate(fg3_pct_per_g = coalesce(fg3_pct_per_g, 0)) |>
  relocate(fg3_pct_per_g, .after = fg3_asted)

to_per_game <- function(x, games) {
  x <- x / games
  return(x)
}

df_cbb <- df_cbb |> # making everything per game
  mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, 
                  other2pt_attempts), function(x) to_per_game(x, games)))

colSums(is.na(df_cbb))
##         dunk_made     dunk_attempts          dunk_pct          rim_made 
##                 0                 0                 0                 0 
##      rim_attempts           rim_pct         rim_asted     other2pt_made 
##                 0                 0                 0                 0 
## other2pt_attempts      other2pt_pct    other2pt_asted           fg2_pct 
##                 0                 0                 0                 0 
##         fg3_per_g        fg3a_per_g         fg3_asted     fg3_pct_per_g 
##                 0                 0                 0                 0 
##             games          ft_per_g         fta_per_g ast_per_g_college 
##                 0                 0                 0                 0 
##         orb_per_g         drb_per_g         stl_per_g         blk_per_g 
##                 0                 0                 0                 0 
##         tov_per_g pts_per_g_college 
##                 0                 0

Following this guide: https://www.datacamp.com/tutorial/pca-analysis-r

df_cbb_scaled <- as_tibble(scale(df_cbb |> select(-games)))
df_cbb_scaled
## # A tibble: 165 × 25
##    dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct rim_asted
##        <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>     <dbl>
##  1     0.360        0.381    0.147    0.898        1.16    -0.569   -0.927 
##  2    -0.777       -0.801    0.334   -0.908       -1.09     1.49    -1.46  
##  3     1.28         1.26     0.328    1.47         1.12     1.46     0.671 
##  4     0.623        0.680    0.0540   0.492        0.395    0.600    0.742 
##  5     1.16         1.17     0.228    1.45         1.19     1.17     0.642 
##  6     0.267        0.254    0.278    0.0749      -0.0119   0.531   -0.0603
##  7    -0.546       -0.575    0.384    0.247        0.380   -0.430    0.0517
##  8     0.121        0.0815   0.403   -0.231       -0.159   -0.430    0.0753
##  9    -0.622       -0.648    0.353   -0.670       -0.737    0.238    0.624 
## 10    -0.606       -0.559   -0.469   -0.822       -0.844   -0.249   -0.615 
## # ℹ 155 more rows
## # ℹ 18 more variables: other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <dbl>, other2pt_asted <dbl>, fg2_pct <dbl>, fg3_per_g <dbl>,
## #   fg3a_per_g <dbl>, fg3_asted <dbl>, fg3_pct_per_g <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>,
## #   stl_per_g <dbl>, blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
# df_cbb_scaled$player <- df_cbb$player
# df_cbb_scaled <- df_cbb_scaled |> relocate(player, .before = dunk_made)

corr_matrix <- cor(df_cbb_scaled)
ggcorrplot(corr_matrix, method = "square")

ggcorrplot(corr_matrix, method = "square", hc.order = TRUE,
           type = "lower")

pca <- princomp(corr_matrix)
summary(pca)
## Importance of components:
##                           Comp.1    Comp.2     Comp.3     Comp.4    Comp.5
## Standard deviation     1.5264968 0.8275209 0.38297252 0.26189097 0.2392287
## Proportion of Variance 0.6731171 0.1978139 0.04236762 0.01981253 0.0165320
## Cumulative Proportion  0.6731171 0.8709310 0.91329860 0.93311112 0.9496431
##                           Comp.6     Comp.7      Comp.8      Comp.9     Comp.10
## Standard deviation     0.2338592 0.19672875 0.166119165 0.136620686 0.092515541
## Proportion of Variance 0.0157982 0.01117981 0.007971468 0.005391775 0.002472454
## Cumulative Proportion  0.9654413 0.97662114 0.984592603 0.989984378 0.992456832
##                            Comp.11     Comp.12     Comp.13      Comp.14
## Standard deviation     0.087727419 0.077813416 0.063038666 0.0522112215
## Proportion of Variance 0.002223154 0.001749072 0.001147923 0.0007874564
## Cumulative Proportion  0.994679986 0.996429058 0.997576981 0.9983644376
##                             Comp.15      Comp.16      Comp.17      Comp.18
## Standard deviation     0.0466045703 0.0419416771 0.0315945772 0.0227293361
## Proportion of Variance 0.0006274163 0.0005081482 0.0002883526 0.0001492356
## Cumulative Proportion  0.9989918540 0.9995000022 0.9997883548 0.9999375904
##                             Comp.19      Comp.20      Comp.21      Comp.22
## Standard deviation     1.371363e-02 4.393920e-03 2.341039e-03 1.590896e-03
## Proportion of Variance 5.432546e-05 5.577032e-06 1.583129e-06 7.311092e-07
## Cumulative Proportion  9.999919e-01 9.999975e-01 9.999991e-01 9.999998e-01
##                             Comp.23      Comp.24      Comp.25
## Standard deviation     7.221375e-04 3.825110e-04 7.167572e-09
## Proportion of Variance 1.506394e-07 4.226557e-08 1.484031e-17
## Cumulative Proportion  1.000000e+00 1.000000e+00 1.000000e+00
fviz_eig(pca, addlabels = TRUE, title = "Principal Components Scree Plot")

fviz_pca_var(pca, col.var = "cos2", repel = TRUE, title = "Contributions of Variables to Components 1 and 2", gradient.cols = c("lightblue", "black"))

fviz_cos2(pca, choice = "var", axes = 1:2)

Guides: http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/

Kmeans Clustering: https://medium.com/@zullinira23/implementation-of-principal-component-analysis-pca-on-k-means-clustering-in-r-794f03ec15f

df_cbb.sample <- df_cbb |> sample_frac(1, replace = FALSE)
head(df_cbb.sample)
##                  dunk_made dunk_attempts dunk_pct  rim_made rim_attempts
## Myles Turner    0.32352941    0.38235294    0.846 1.1764706     1.588235
## Jimmer Fredette 0.01438849    0.02158273    0.667 0.7841727     1.086331
## Collin Sexton   0.15151515    0.24242424    0.625 3.1212121     5.515152
## John Henson     0.50458716    0.52293578    0.965 0.7614679     1.055046
## Kelly Olynyk    0.24752475    0.27722772    0.893 1.5049505     2.099010
## Trey Lyles      0.77777778    0.83333333    0.933 1.9166667     2.444444
##                 rim_pct rim_asted other2pt_made other2pt_attempts other2pt_pct
## Myles Turner      0.741     0.650     1.7647059         4.1470588        0.426
## Jimmer Fredette   0.722     0.294     0.8057554         2.1510791        0.375
## Collin Sexton     0.566     0.233     1.4848485         3.7575758        0.395
## John Henson       0.722     0.759     1.1192661         2.7064220        0.414
## Kelly Olynyk      0.717     0.553     0.4752475         0.9009901        0.527
## Trey Lyles        0.784     0.565     1.2222222         3.4166667        0.358
##                 other2pt_asted fg2_pct fg3_per_g fg3a_per_g fg3_asted
## Myles Turner             0.633   0.513       0.5        1.8     0.765
## Jimmer Fredette          0.089   0.497       2.1        5.4     0.455
## Collin Sexton            0.061   0.495       1.3        4.0     0.318
## John Henson              0.697   0.505       0.0        0.2     0.000
## Kelly Olynyk             0.417   0.634       0.2        0.7     0.889
## Trey Lyles               0.614   0.536       0.1        0.8     0.750
##                 fg3_pct_per_g games ft_per_g fta_per_g ast_per_g_college
## Myles Turner        0.2777778    34      2.8       3.3               0.6
## Jimmer Fredette     0.3888889   139      4.5       5.1               3.7
## Collin Sexton       0.3250000    33      5.9       7.6               3.6
## John Henson         0.0000000   109      1.6       3.2               1.0
## Kelly Olynyk        0.2857143   101      2.0       2.8               1.1
## Trey Lyles          0.1250000    36      2.1       2.8               1.1
##                 orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
## Myles Turner          1.3       5.2       0.3       2.6       1.4
## Jimmer Fredette       0.5       2.1       1.2       0.1       2.5
## Collin Sexton         1.0       2.7       0.8       0.1       2.8
## John Henson           2.5       5.7       0.6       2.5       1.5
## Kelly Olynyk          1.5       3.1       0.5       0.5       1.4
## Trey Lyles            1.8       3.4       0.5       0.4       1.1
##                 pts_per_g_college
## Myles Turner                 10.1
## Jimmer Fredette              18.7
## Collin Sexton                19.2
## John Henson                  10.3
## Kelly Olynyk                  8.9
## Trey Lyles                    8.7
df_cbb.pca <- PCA(df_cbb.sample |> select(-games), scale.unit = TRUE, graph = FALSE)

fviz_eig(df_cbb.pca, addlabels = TRUE)

fviz_pca_var(df_cbb.pca, col.var = "cos2", gradient.cols = c("lightblue", "black"), repel = TRUE)

var <- get_pca_var(df_cbb.pca)

fviz_cos2(df_cbb.pca, choice = "var", axes = 1:2)

fviz_pca_ind(df_cbb.pca, repel = TRUE, labelsize = 1)

summary(df_cbb.pca)
## 
## Call:
## PCA(X = select(df_cbb.sample, -games), scale.unit = TRUE, graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance               7.910   5.334   2.090   1.463   1.278   1.196   1.058
## % of var.             31.642  21.336   8.359   5.852   5.113   4.784   4.231
## Cumulative % of var.  31.642  52.978  61.336  67.189  72.302  77.085  81.316
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.980   0.728   0.551   0.447   0.419   0.332   0.269
## % of var.              3.922   2.911   2.203   1.787   1.677   1.329   1.075
## Cumulative % of var.  85.238  88.149  90.352  92.140  93.817  95.146  96.221
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19  Dim.20  Dim.21
## Variance               0.254   0.210   0.164   0.138   0.072   0.062   0.022
## % of var.              1.017   0.840   0.655   0.553   0.286   0.247   0.088
## Cumulative % of var.  97.238  98.079  98.734  99.287  99.574  99.821  99.909
##                       Dim.22  Dim.23  Dim.24  Dim.25
## Variance               0.012   0.006   0.004   0.002
## % of var.              0.046   0.023   0.014   0.007
## Cumulative % of var.  99.955  99.978  99.993 100.000
## 
## Individuals (the 10 first)
##                       Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2  
## Myles Turner      |  4.749 |  1.929  0.285  0.165 | -1.917  0.418  0.163 |
## Jimmer Fredette   |  4.809 | -3.867  1.146  0.647 |  0.263  0.008  0.003 |
## Collin Sexton     |  5.851 | -2.729  0.570  0.217 |  3.754  1.601  0.412 |
## John Henson       |  6.154 |  3.308  0.838  0.289 | -2.665  0.807  0.188 |
## Kelly Olynyk      |  4.781 |  1.070  0.088  0.050 | -3.551  1.433  0.552 |
## Trey Lyles        |  4.437 |  2.347  0.422  0.280 | -2.411  0.660  0.295 |
## Anthony Black     |  3.631 | -1.285  0.126  0.125 |  1.571  0.280  0.187 |
## Thomas Robinson   |  5.982 |  0.703  0.038  0.014 | -2.124  0.512  0.126 |
## Kris Dunn         |  4.707 | -2.539  0.494  0.291 |  0.121  0.002  0.001 |
## Otto Porter Jr.   |  3.375 |  0.208  0.003  0.004 | -1.729  0.340  0.263 |
##                    Dim.3    ctr   cos2  
## Myles Turner       2.091  1.268  0.194 |
## Jimmer Fredette   -0.758  0.167  0.025 |
## Collin Sexton     -0.753  0.165  0.017 |
## John Henson       -0.650  0.123  0.011 |
## Kelly Olynyk       0.137  0.005  0.001 |
## Trey Lyles         0.456  0.060  0.011 |
## Anthony Black     -2.131  1.317  0.345 |
## Thomas Robinson    1.739  0.877  0.085 |
## Kris Dunn         -2.822  2.310  0.359 |
## Otto Porter Jr.    0.974  0.275  0.083 |
## 
## Variables (the 10 first)
##                      Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr
## dunk_made         |  0.834  8.788  0.695 |  0.336  2.113  0.113 | -0.023  0.025
## dunk_attempts     |  0.828  8.662  0.685 |  0.337  2.132  0.114 | -0.023  0.025
## dunk_pct          |  0.343  1.489  0.118 | -0.070  0.091  0.005 |  0.214  2.193
## rim_made          |  0.585  4.330  0.343 |  0.673  8.503  0.454 | -0.093  0.416
## rim_attempts      |  0.453  2.598  0.205 |  0.740 10.256  0.547 | -0.097  0.448
## rim_pct           |  0.728  6.706  0.530 | -0.141  0.373  0.020 | -0.002  0.000
## rim_asted         |  0.768  7.456  0.590 | -0.352  2.327  0.124 |  0.180  1.558
## other2pt_made     |  0.109  0.149  0.012 |  0.603  6.823  0.364 |  0.563 15.195
## other2pt_attempts |  0.102  0.132  0.010 |  0.651  7.952  0.424 |  0.490 11.501
## other2pt_pct      |  0.015  0.003  0.000 | -0.039  0.029  0.002 |  0.358  6.122
##                     cos2  
## dunk_made          0.001 |
## dunk_attempts      0.001 |
## dunk_pct           0.046 |
## rim_made           0.009 |
## rim_attempts       0.009 |
## rim_pct            0.000 |
## rim_asted          0.033 |
## other2pt_made      0.318 |
## other2pt_attempts  0.240 |
## other2pt_pct       0.128 |
pca2 <- prcomp(df_cbb.sample, center = TRUE, scale = TRUE)
summary(pca2)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.8233 2.3564 1.45379 1.28788 1.18415 1.09891 1.03423
## Proportion of Variance 0.3066 0.2135 0.08129 0.06379 0.05393 0.04645 0.04114
## Cumulative Proportion  0.3066 0.5201 0.60142 0.66521 0.71914 0.76559 0.80673
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.99030 0.85552 0.75400 0.67049 0.65151 0.64104 0.57399
## Proportion of Variance 0.03772 0.02815 0.02187 0.01729 0.01633 0.01581 0.01267
## Cumulative Proportion  0.84445 0.87260 0.89447 0.91176 0.92808 0.94389 0.95656
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     0.51590 0.49111 0.40960 0.40167 0.37014 0.26727 0.20134
## Proportion of Variance 0.01024 0.00928 0.00645 0.00621 0.00527 0.00275 0.00156
## Cumulative Proportion  0.96680 0.97607 0.98253 0.98873 0.99400 0.99675 0.99831
##                           PC22    PC23    PC24    PC25    PC26
## Standard deviation     0.14559 0.10740 0.07652 0.06000 0.04282
## Proportion of Variance 0.00082 0.00044 0.00023 0.00014 0.00007
## Cumulative Proportion  0.99912 0.99957 0.99979 0.99993 1.00000
df_cluster <- as.data.frame(-pca2$x[, 1:2])
df_cluster
##                                  PC1          PC2
## Myles Turner             -1.91803535 -1.800563503
## Jimmer Fredette           4.10068583 -0.151455721
## Collin Sexton             2.49870233  3.953433538
## John Henson              -3.00626308 -3.180396627
## Kelly Olynyk             -0.78055387 -3.858127881
## Trey Lyles               -2.31079038 -2.311364621
## Anthony Black             1.15287196  1.737863300
## Thomas Robinson          -0.46560196 -2.442928178
## Kris Dunn                 2.64831964 -0.054392871
## Otto Porter Jr.          -0.11050188 -1.773914232
## Shabazz Muhammad         -0.41381159  2.159011494
## Tyrese Haliburton         2.20631834 -2.697991218
## Deandre Ayton            -6.42063201  3.989709140
## Meyers Leonard           -2.79244626 -3.646226533
## Cade Cunningham           2.85426045  4.449564443
## Kentavious Caldwell-Pope  2.20137247 -0.823482753
## Paul George               2.04066776 -0.155321706
## Marvin Bagley III        -5.74422617  4.591555606
## Cameron Payne             4.43284878  1.397687085
## Kendall Marshall          4.50594244 -1.991570055
## Anthony Bennett          -3.16583380  1.008475683
## Jakob Poeltl             -3.09307807 -1.010869663
## Austin Rivers             2.94309828  1.408832101
## Jalen Suggs               1.51193273  1.416722416
## Taurean Prince            1.41666106 -3.713851811
## Trae Young                6.57771252  7.880998042
## De'Aaron Fox              0.82524429  3.344308266
## Victor Oladipo            0.59336620 -2.543388232
## Jarace Walker            -1.37801187 -1.216966913
## Onyeka Okongwu           -5.85709792  2.599723240
## Zion Williamson          -5.17455606  4.695617011
## DeMarcus Cousins         -4.47722765  2.532951574
## Andre Drummond           -6.01362530 -0.579810217
## Elfrid Payton             1.97980456  0.903624480
## Mo Bamba                 -5.19088002  0.667639378
## Donovan Mitchell          3.10251794 -1.824734500
## Alex Len                 -3.65666018 -2.221479147
## Kevin Knox                0.57857116  0.787030146
## Wendell Carter Jr.       -3.43851631  0.759548294
## Marquese Chriss          -2.85951816  0.454169608
## Davion Mitchell           3.46328985 -1.590099336
## De'Andre Hunter           0.73977851 -2.045387990
## Cameron Johnson           1.40190265 -2.112733416
## Ekpe Udoh                -2.95349083  2.108737125
## Al-Farouq Aminu          -1.06979323  0.683184259
## Anthony Davis            -6.81946695  1.298925467
## Aaron Gordon             -2.77173027  0.539925608
## Derrick Favors           -5.07612783  0.537442192
## Jaxson Hayes             -6.14378844 -1.935755954
## Evan Mobley              -4.34455690  2.691116412
## Jaren Jackson Jr.        -1.54989689 -1.654063788
## Cam Reddish               4.08549065 -0.100918560
## Jordan Hawkins            3.10079731 -2.711257786
## Stanley Johnson           1.12015696  1.170691584
## Nik Stauskas              3.10769654 -1.847553292
## Joshua Primo              1.51844668 -3.740079647
## Justise Winslow           0.32450165  0.046720801
## Doug McDermott            0.97886742 -1.321678910
## Julius Randle            -2.24340507  3.042945784
## Jaylen Brown              0.83240004  2.294511909
## Jaden Ivey                1.84082641 -0.056522486
## Brandon Knight            3.48956878  2.033951063
## Romeo Langford            0.69108021  2.226529738
## RJ Barrett                0.01227205  5.110094647
## Patrick Williams          0.07429544 -1.354172663
## Cole Aldrich             -2.80512022 -3.923034303
## Jonathan Isaac           -1.55579355 -0.429327692
## Josh Jackson             -1.57310640  3.071593787
## James Bouknight           1.50440966 -0.805096400
## Damian Lillard            4.26753179  0.405236246
## Kira Lewis Jr.            3.22512852  0.145447070
## Marcus Smart              2.54328344  2.015237976
## Michael Carter-Williams   2.86581601 -0.870790993
## Shai Gilgeous-Alexander   1.99930055  2.359093850
## Cason Wallace             2.27311780 -0.159975101
## Kemba Walker              3.16087458  0.642457328
## Anthony Edwards           1.36140263  2.673735875
## Taylor Hendricks         -1.22550512 -0.175647086
## Ochai Agbaji              1.61424758 -3.117399223
## Jalen Williams            1.77088011 -1.662712781
## Karl-Anthony Towns       -2.12840084 -1.109330782
## Xavier Henry              1.19039770 -0.836284356
## Lonzo Ball                0.20427619  0.082601232
## D'Angelo Russell          2.92106171  2.705140788
## Jeremy Sochan            -0.94012964 -1.962196272
## Harrison Barnes           1.07392023 -0.415393391
## Lauri Markkanen           0.13400874 -0.115337390
## Trey Burke                3.56814702  0.621441435
## T.J. Warren              -1.14870807  0.419904979
## Ja Morant                 2.44350644  3.154068779
## Buddy Hield               3.50692348 -1.416444649
## Scottie Barnes            0.34079523  0.018038969
## Greg Monroe              -0.61040396  0.650297864
## Cody Zeller              -2.04375199  0.560514715
## Patrick Patterson        -2.59675241 -2.071578251
## Joel Embiid              -4.56071760 -0.275049579
## Noah Vonleh              -1.16775866  0.188969620
## Dennis Smith Jr.          2.38981682  4.058571848
## Michael Kidd-Gilchrist   -1.54339794  0.537228608
## Bennedict Mathurin        1.03736333 -1.047637079
## Terrence Ross             1.12336118 -2.567881207
## Brandon Ingram            1.61026723  1.916191506
## Ed Davis                 -3.34988242 -3.186735688
## Jalen Smith              -2.16080003 -1.786709370
## P.J. Washington          -0.38430532 -1.057924680
## Jamal Murray              2.32101496  1.773731573
## Jett Howard               2.88030411 -1.519978771
## John Wall                 1.68160821  3.975501776
## Coby White                3.30501478  0.962609638
## Evan Turner               1.44633779  0.509352088
## Paolo Banchero           -0.78117820  2.804053329
## Dereck Lively II         -5.13940729 -4.908354365
## Devin Vassell             0.68178092 -4.075928100
## Keegan Murray            -1.65497692 -0.671771098
## Marcus Morris            -0.78619850 -2.586158338
## Derrick Williams         -1.06807992  1.073410109
## Markieff Morris          -1.34708852 -3.999247075
## Jerome Robinson           3.01145976 -0.204910540
## Jalen Duren              -6.04834658  0.567839906
## Aaron Nesmith             3.24624148 -1.628776546
## Gradey Dick               1.38567770 -1.245442843
## Ziaire Williams           1.73956057 -0.236909097
## Bradley Beal              0.82754947  0.659943331
## Jahlil Okafor            -5.53352532  2.780697880
## CJ McCollum               3.87520407  0.619831203
## Obi Toppin               -3.10013537 -0.850875977
## Frank Kaminsky            0.17667352 -4.085571358
## Kyrie Irving              3.69754992  2.860691262
## Zach Collins             -2.29766335 -2.206084028
## Jayson Tatum              0.72039348  2.664410902
## Jabari Smith Jr.          1.57911124  1.495506781
## Franz Wagner              1.19544539 -2.167513895
## Chris Duarte              1.86046277 -1.292904206
## Malik Monk                2.24035661  1.712986893
## Dion Waiters              1.87877434 -2.893422978
## Zach LaVine               1.55093364 -2.642329927
## Bam Adebayo              -6.03442637  1.151787352
## Mikal Bridges             1.14420389 -3.554794904
## Alec Burks                0.72391129  2.047902304
## Jarrett Culver            1.47235138 -0.024087543
## Ben McLemore             -0.30783252  0.245872619
## Jabari Parker            -2.47210802  3.877981214
## Jeremy Lamb               0.44879947 -2.590888791
## Willie Cauley-Stein      -2.67767592 -4.026223965
## Brandon Miller            1.94836905  2.026234708
## Klay Thompson             3.79667554 -0.207371725
## Andrew Wiggins           -0.40394987  2.619085299
## Rui Hachimura            -0.93351176 -2.636481966
## Nerlens Noel             -5.52149230  0.801447386
## Isaac Okoro              -0.45583166  0.276546204
## Miles Bridges             0.43665877 -0.849012232
## Devin Booker              1.24174959 -3.484009154
## Luke Kennard              2.73577444 -0.919701787
## Wes Johnson              -1.59432891  1.355658714
## Gordon Hayward            0.63712837 -0.809277017
## Tyler Herro               1.75186786 -0.416344735
## Steven Adams             -3.71212322 -2.800032293
## Markelle Fultz            1.98389283  6.289069268
## Tristan Thompson          4.06381690 -2.262810456
## Johnny Davis              1.29838193 -0.145475806
## Denzel Valentine          3.02166326 -3.193070266
## Chet Holmgren            -4.24371197  0.005887613
## Domantas Sabonis         -1.73763383 -1.176518418
## Ben Simmons              -3.63357550  6.421547748
## Moses Moody               1.43065318  1.445225669
fviz_nbclust(df_cluster, kmeans, method = 'wss')

fviz_nbclust(df_cluster, kmeans, method = 'silhouette')

fviz_nbclust(df_cluster, kmeans, method = 'gap_stat')

k <- 6
df_cbb.kmeans = kmeans(df_cluster, centers = k, nstart = 50)
fviz_cluster(df_cbb.kmeans, df_cluster, labelsize = 5, repel = TRUE)